#NOTE: All analyses are done using R studio (Version 4.4.2)


# First, Install all essential packages for the analyses

install.packages (c("tidyverse", "janitor", "sandwich", "lmtest", "clubSandwich", "stargazer", "margins", "ggplot2", "extrafont", "patchwork", "broom", "car", "knitr"))

# Load essential packages for data cleaning
library(tidyverse)

library(janitor)

# SET YOUR WORKING DIRECTORY

# Check where working directory is

getwd()

# Set working directorate

setwd("C:/Users/YourName/Documents/R_Projects/MyAnalysis") # Windows example
setwd("~/Documents/R_Projects/MyAnalysis") # Mac example

# Note: 
# R prefers forward slashes (/) even on Windows
# If you copy a path from Windows Explorer, it will use backslashes (\)

# Load the ANES 2020 data

anes_2020_data <- read_csv("Anes 2020 CSV Data.csv")

# Rename essential variables in the data for clarity in the analyses 

anes_2020_data <- rename(anes_2020_data, Race = V201549x, Voted = V202109x, Age = V201507x, Education = V201511x, Gender = V201600, Income = V201617x, Region = V203003, Therm_Rep = V201157, Therm_Dem = V201156) #Therm_Rep stands for feeling thermometer for republican party and Therm_Dem stands for feeling thermometer for democratic party.
#Note: Please refer to ANES code book for reference on the variables numbers that are being renamed.

# Subset essential variables for analysis

anes_2020_data <- select (anes_2020_data, Race, Voted, Age, Education, Income, Region, Gender, Therm_Rep, Therm_Dem) %>%
  filter(Race == 2)%>%
  filter(Voted != -2) %>%
  filter(Age != -9) %>%
  filter(Income != -9) %>%
  filter(Education != -2) %>%
  filter(Education != -8) %>%
  filter(Education != -9) %>%
  filter(Gender != -9) %>%
  filter(Therm_Rep != -9) %>%
  filter(Therm_Dem != -9)

#Note: -2, -5, -6, -7, -8, and -9 have been filtered out because respondents did not complete that part of the survey, it was incomplete, missing values, no post election interview, refused to answer, don't know or due to technical errors respectively according to the ANES codebook.

# Code region with fixed effects to control for variations or differences in voter turnout
anes_2020_data <- mutate(anes_2020_data, Region = as.factor(Region)) # Treat region as categorical for fixed effects (dummy variables)

# Code Turnout (Voted), and Gender as binary (Yes or No, 1 or 0)
anes_2020_data <- mutate(anes_2020_data, Voted = if_else(Voted == 1, 1, 0))

anes_2020_data <- mutate(anes_2020_data, Gender = if_else(Gender == 1, 1, 0))

# Reverse code feeling thermometer for Republican Hostility

anes_2020_data <- mutate (anes_2020_data, Republican_Hostility = if_else (Therm_Rep >= 0, 100 - Therm_Rep, NA_real_))
hist(anes_2020_data$Republican_Hostility) #Overview of the distribution

# Mean and Standard Deviation FOR GOP HOSTILITY
mean(anes_2020_data$Republican_Hostility)
sd(anes_2020_data$Republican_Hostility)

# Reverse code feeling thermometer for Democratic warmth (positive partisanship)

anes_2020_data <- mutate (anes_2020_data, Democratic_Warmth = if_else(Therm_Dem >= 0, 100 - Therm_Dem, NA_real_))
hist(anes_2020_data$Democratic_Warmth) #Overview of the Distribution

# Mean and Standard Deviation FOR DEMOCRATIC WARMTH
mean(anes_2020_data$Democratic_Warmth)
sd(anes_2020_data$Democratic_Warmth)

# Visualize the Distribution of Republican Hostility and Democratic Warmth on a Histogram
Histogram_Rep_Hostility <- ggplot(anes_2020_data, aes(Republican_Hostility)) + geom_histogram(bins = 17) + xlab("Hostile Feeling") + ylab("Frequency") + theme_classic()

Histogram_Rep_Hostility + theme(
  axis.title.x = element_text(family = "serif", size = 12),  # X-axis title
  axis.title.y = element_text(family = "serif", size = 12),  # Y-axis title
  axis.text.x = element_text(family = "serif", size = 12),   # X-axis tick labels
  axis.text.y = element_text(family = "serif", size = 12)    # Y-axis tick labels
) + theme(panel.grid = element_blank())


Histogram_Dem_Warmth <- ggplot(anes_2020_data, aes(Democratic_Warmth)) + geom_histogram(bins = 17) + xlab("Warmth Feeling") + ylab("Frequency") + theme_classic()

Histogram_Dem_Warmth + theme(
  axis.title.x = element_text(family = "serif", size = 12),  # X-axis title
  axis.title.y = element_text(family = "serif", size = 12),  # Y-axis title
  axis.text.x = element_text(family = "serif", size = 12),   # X-axis tick labels
  axis.text.y = element_text(family = "serif", size = 12)    # Y-axis tick labels
) + theme(panel.grid = element_blank())


# Center Republican Hostility and Democratic Warmth at their means for interactions and scaling across models
anes_2020_data <- mutate(anes_2020_data, Republican_Hostility = Republican_Hostility - mean(Republican_Hostility, na.rm = TRUE))

anes_2020_data <- mutate(anes_2020_data, Democratic_Warmth = Democratic_Warmth - mean(Democratic_Warmth, na.rm = TRUE))


# Code Affective Polarization Index
anes_2020_data <- mutate(anes_2020_data, Affective_Pol_Index = Republican_Hostility - Democratic_Warmth)
hist(anes_2020_data$Affective_Pol_Index)


# Set up libraries and load essential packages for bivariate and multivariate analysis 

library(sandwich)
library(lmtest)
library(stargazer)

# MODEL SPECIFICATIONS

# Model 1: Republican Hostility only
model1 <- glm(Voted ~ Republican_Hostility, data = anes_2020_data, family = "binomial")

model1_clustered_se <- vcovCL(model1, cluster = ~Region, type = "HC1") #Clusters standard errors around region

# Model 2: Democratic Warmth only
model2 <- glm(Voted ~ Democratic_Warmth, data = anes_2020_data, family = "binomial")

model2_clustered_se <- vcovCL(model2, cluster = ~Region, type = "HC1")

# Model 3: Full model (now with clustered SEs)
model3 <- glm(Voted ~ Republican_Hostility + Democratic_Warmth + Age + Gender + Education + Income + as.factor(Region), data = anes_2020_data, family = "binomial")
model3_clustered_se <- vcovCL(model3, cluster = ~Region, type = "HC1")

# Model 4: Robustness check (Replicating finding with Affective Polarization Index)
model4 <- glm(Voted ~ Affective_Pol_Index + Age + Gender + Education + Income + as.factor(Region), data = anes_2020_data, family = "binomial")
model4_clustered_se <- vcovCL(model4, cluster = ~Region, type = "HC1")

# Including Model Fit Statistics:

# McFadden's Pseudo R²
pseudo_r2 <- function(model) {round(1 - (model$deviance / model$null.deviance), 3)}

fit_stats <- data.frame(
  Model = c("Baseline (GOP)", "Baseline (Dem)", "Full Model", "Robustness Check"),
  AIC = c(AIC(model1), AIC(model2), AIC(model3), AIC(model4)),
  BIC = c(BIC(model1), BIC(model2), BIC(model3), BIC(model4)),
  Pseudo_R2 = c(pseudo_r2(model1), pseudo_r2(model2), 
                pseudo_r2(model3), pseudo_r2(model4)),
  N = c(nobs(model1), nobs(model2), nobs(model3), nobs(model4))
)


# MAIN REGRESSION TABLE

# Prepare standard error list (all clustered)
se_list <- list(
  sqrt(diag(model1_clustered_se)),
  sqrt(diag(model2_clustered_se)),
  sqrt(diag(model3_clustered_se)),
  sqrt(diag(model4_clustered_se))
)

stargazer(model1, model2, model3, model4,
          apply.coef = exp,  # Convert to Odds Ratios
          se = se_list,      # All models use clustered SEs
          type = "text",     # Change to "latex" for paper submission
          title = "Logistic Regression Models of Black Voting Behavior in 2020 (Odds Ratios)",
          dep.var.caption = "Outcome: Voted (1 = Yes, 0 = No)",
          dep.var.labels = "",
          column.labels = c("Republican Baseline Model (M1)", "Democratic Baseline Model (M2)", "Full Model (M3)", "Robustness Model(4)"),
          covariate.labels = c(
            "Republican Hostility", 
            "Democratic Warmth",
            "Affective Polarization Index",
            "Age", 
            "Gender (Female)", 
            "Education", 
            "Income",
            "Midwest (ref: Northeast)",
            "South (ref: Northeast)",
            "West (ref: Northeast)"
          ),
          omit.stat = c("LL", "ser", "n"),
          add.lines = list(
            c("Region Fixed Effects", "No", "No", "Yes", "Yes"),
            c("Pseudo R²", fit_stats$Pseudo_R2),
            c("AIC", fit_stats$AIC),
            c("BIC", fit_stats$BIC),
            c("Observations", fit_stats$N)
          ),
          star.cutoffs = c(0.05, 0.01, 0.001),
          notes = c(
            "Odds ratios reported with clustered standard errors (by region) in parentheses.",
            "Reference category for regions: Northeast.",
            "***p<0.01, **p<0.05, *p<0.1"
          ))




#PROBABILITY UNIT REGRESSION (Probit Model to replicate findings)

model5 <- glm(Voted ~ Republican_Hostility + Democratic_Warmth + Age + Gender + Income + as.factor(Region), data = anes_2020_data, family = binomial(link = "probit"))
summary(model5)

model5_clustered_se <- vcovCL(model5, cluster = ~Region)
coeftest(model5, vcov. = model5_clustered_se)

# Load essential packages
library(margins)
library(ggplot2)

margins(model5, variables = c("Republican_Hostility", "Democratic_Warmth"))  

probit_margins <- margins(model5, variables = c("Republican_Hostility", "Democratic_Warmth")) #Calculating marginal effects for probit
summary(probit_margins)


# Getting Average Marginal effects results from model5
mfx <- margins(model5)
mfx_df <- as.data.frame(mfx)

names(mfx_df) #Checking the names of the variables in model5.

# Plotting and visualizing Average Marginal Effects (AME) obtained from model6
plot_df <- data.frame(
  Predictor = c("Republican_Hostility", "Democratic_Warmth"),
  AME = c(mfx_df$dydx_Republican_Hostility[1], mfx_df$dydx_Democratic_Warmth[1]),
  SE = c(sqrt(mfx_df$Var_dydx_Republican_Hostility[1]), sqrt(mfx_df$Var_dydx_Democratic_Warmth[1]))
)

# Plotting AME in Font Size 12 with color aesthetics
library(ggplot2)
library(extrafont)  # For custom fonts

# Load system fonts (For Times New Roman font)
loadfonts(device = "win")  # Use "win" for Windows; "quartz" for Mac

# Create the plot with custom styling
ggplot(plot_df, aes(x = Predictor, y = AME, 
                    color = Predictor)) +
  geom_point(size = 4) +
  geom_errorbar(aes(ymin = AME - 1.96 * SE, 
                    ymax = AME + 1.96 * SE), 
                width = 0.2, 
                size = 0.8) +
  geom_hline(yintercept = 0, 
             linetype = "dashed", 
             color = "black",
             size = 0.5) +
  geom_text(aes(label = sprintf("%.3f", AME)), 
            vjust = -1.5, 
            size = 4, 
            color = "black",
            family = "Times New Roman") +
  scale_color_manual(values = c("Republican_Hostility" = "#8B0000",  # Wine red
                                "Democratic_Warmth" = "#87CEEB")) +   # Sky blue
  scale_x_discrete(labels = c("Republican_Hostility" = "Republican Hostility",
                              "Democratic_Warmth" = "Democratic Warmth")) +
  labs(
    title = "Average Marginal Effects from Probit Model",
    x = NULL,
    y = "Average Marginal Effect on\nProbability of Voting"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    plot.title = element_text(face = "bold", hjust = 0.5),
    axis.text = element_text(color = "black"),
    axis.title.y = element_text(margin = margin(r = 10)),
    panel.grid.major.x = element_blank(),
    legend.position = "none"
  )

#High resolution (600 dpi)
ggsave("probit_marginal_effects.tiff", 
       width = 7, height = 5, units = "in",
       dpi = 600, 
       compression = "lzw")  # Lossless compression



#COMPARING VOTER TURNOUT PATTERNS IN DIFFERENT REGIONS

#Load essential packages (if you have not)
library(ggplot2)
library(patchwork)
library(broom)
library(extrafont)


# Load Times New Roman (ensure it's installed on your system)
loadfonts(device = "win")  # Use "win" for Windows; "quartz" for Mac

# Plot custom turnout patterns in regions with 
p3 <- tidy(model3) %>%
  filter(str_detect(term, "Region")) %>%
  mutate(Region = case_when(
    str_detect(term, "2") ~ "Midwest",
    str_detect(term, "3") ~ "South",
    str_detect(term, "4") ~ "West",
    TRUE ~ "Northeast"
  )) %>%
  ggplot(aes(x = Region, y = exp(estimate), color = Region)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = exp(estimate - 1.96*std.error),
                    ymax = exp(estimate + 1.96*std.error)), width = 0.2) +
  geom_hline(yintercept = 1, linetype = "dashed") +
  labs(
    y = "Odds Ratio"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman", size = 12),  # Set font
    plot.title = element_text(face = "bold", hjust = 0.5),      # Center title
    axis.title = element_text(size = 12),
    legend.text = element_text(size = 11)
  )

# Save as high-resolution (900 dpi)
ggsave("regional_odds_ratios.png", p3, dpi = 900, width = 6, height = 4)



# ROBUSTNESS CHECKS

#Confidence Interval (CI) width and Null Effects

library(ggplot2)
library(broom)
library(extrafont)

# Load Times New Roman (run once if never used)
loadfonts(device = "win")  # Use "quartz" for Mac

# Generate custom plot with ONLY Republican_Hostility and Democratic_Warmth
robustness_plot <- tidy(model3) %>%  # Replace 'model' with your actual model object
  filter(term %in% c("Republican_Hostility", "Democratic_Warmth")) %>%
  ggplot(aes(
    x = term, 
    y = estimate, 
    ymin = estimate - 1.96 * std.error, 
    ymax = estimate + 1.96 * std.error
  )) +
  geom_point(size = 3) +
  geom_errorbar(width = 0.2) +
  geom_hline(
    yintercept = 0, 
    linetype = "dashed", 
    color = "red", 
    linewidth = 0.7
  ) +
  scale_x_discrete(
    labels = c(
      "Republican_Hostility" = "GOP Hostility", 
      "Democratic_Warmth" = "Dem. Warmth"
    )
  ) +
  labs(
    title = "Robustness Check: Key Coefficients with 95% CIs",
    x = "Predictor Variables",
    y = "Log-Odds Estimate",
    caption = "Dashed line indicates null effect (β = 0)."
  ) +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    plot.title = element_text(
      face = "bold", 
      hjust = 0.5,
      size = 12,
      margin = margin(b = 10)
    ),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11, color = "black"),
    panel.grid.major.x = element_blank(),
    plot.caption = element_text(hjust = 0, size = 10)
  )

# Save as 900 DPI PNG
ggsave(
  "robustness_check_final.png", 
  robustness_plot, 
  dpi = 900, 
  width = 6.5, 
  height = 4.5, 
  units = "in"
)

# SENSITIVITY ANALYSIS

#classical, robust, and clustered standard errors

library(sandwich)
library(lmtest)
library(ggplot2)
library(extrafont)

# Calculate all error types using model3
results <- list(
  Classical = coeftest(model3),
  Robust = coeftest(model3, vcov = vcovHC(model3, type = "HC1")),
  Clustered = coeftest(model3, vcov = vcovCL(model3, cluster = ~Region, type = "HC1"))
)

# Create comparison data frame with proper region labels
results_compare <- data.frame(
  Variable = names(coef(model3)),
  Coef = coef(model3),
  Classic_SE = summary(model3)$coefficients[, 2],
  Robust_SE = results$Robust[, 2],
  Cluster_SE = results$Clustered[, 2]
)

# Clean up variable names and relabel regions
results_compare <- results_compare %>%
  mutate(
    Variable = case_when(
      Variable == "as.factor(Region)2" ~ "Midwest",
      Variable == "as.factor(Region)3" ~ "South",
      Variable == "as.factor(Region)4" ~ "West",
      Variable == "Republican_Hostility" ~ "GOP Hostility",
      Variable == "Democratic_Warmth" ~ "Dem. Warmth",
      Variable == "(Intercept)" ~ "Intercept",
      TRUE ~ Variable
    )
  )

#Note: Refer to ANES 2020 dataset codebook for regional representations.

# Prepare data for plotting
plot_data <- results_compare %>%
  pivot_longer(cols = c(Classic_SE, Robust_SE, Cluster_SE),
               names_to = "SE_Type",
               values_to = "SE_Value") %>%
  mutate(SE_Type = factor(SE_Type,
                          levels = c("Classic_SE", "Robust_SE", "Cluster_SE"),
                          labels = c("Classical", "Robust", "Clustered")))

# Load Times New Roman font for custom plotting
loadfonts(device = "win")  # Use "quartz" for Mac

# Generate custom sensitivity plot with Times New Roman
ggplot(plot_data, aes(x = Variable, y = SE_Value, color = SE_Type)) +
  geom_point(size = 3, position = position_dodge(width = 0.5)) +
  coord_flip() +
  scale_color_manual(values = c("#1f77b4", "#ff7f0e", "#2ca02c")) +
  labs(title = "Comparison of Standard Error Estimates",
       y = "Standard Error",
       x = "",
       color = "Error Type") +
  theme_minimal(base_size = 12) +
  theme(
    text = element_text(family = "Times New Roman"),
    plot.title = element_text(face = "bold", hjust = 0.5, size = 12),
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    legend.position = "bottom",
    legend.text = element_text(size = 11)
  )

# Save as high-resolution PNG
ggsave("sensitivity_plot.png", dpi = 900, width = 8, height = 6, units = "in")


#CHECK FOR MULTICOLINEARITY ISSUES (Generalized Variance Inflation Factors- GVIF)

#Load "car" package
library(car)

vif(model1) #single predictor, making VIF computation unnecessary.
vif(model2) #single predictor, making VIF computation unnecessary.
vif(model3) #Shows no multicollinearity issues.
vif(model4) #Shows no serious multicollinearity issues.

#Load knitr package to display result in a table
library(knitr)

#Prepare VIF results for table
vif_results <- data.frame(
  Variable = c("Republican Hostility", "Democratic Warmth", "Affective Pol. Index", 
               "Age", "Gender", "Education", "Income", "Region"),
  Model3_GVIF = c(1.08, 1.16, NA, 1.03, 1.03, 1.19, 1.20, 1.06),
  Model4_GVIF = c(NA, NA, 1.04, 1.03, 1.03, 1.17, 1.19, 1.05)
)

# Generate a table of the VIF
kable(vif_results, 
      caption = "Table X. Generalized Variance Inflation Factors (GVIF) by Model",
      col.names = c("Variable", "Model 3 GVIF", "Model 4 GVIF"),
      digits = 2,
      align = "lcc")
